GeoDataFrame#
This is a team work from Python Intermediate Course. You can check out how to import data from Github, convert to PandaDataFrame and GeoDataFrame, verify missing values, manipulate and merge data, and make interactive maps according to the respective request.
#!pip install folium
#Librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import chardet
import folium as fm
from folium import Marker, GeoJson
from folium.plugins import MarkerCluster, HeatMap, StripePattern
import geopandas as gpd
from geopandas import GeoSeries
from shapely.geometry import Point, LineString
import branca as br
from IPython.display import display, HTML
display(HTML(data="""
<style>
div#notebook-container { width: 95%; }
div#menubar-container { width: 65%; }
div#maintoolbar-container { width: 99%; }a
</style>
"""))
1. Import the data located at this link. It has information on Tech Institutes’ total vacancies, total applicants, total entrants, and total enrolled. Moreover, the institutes are geolocated.#
#importando la data del github
inst=pd.read_csv("https://raw.githubusercontent.com/alexanderquispe/Diplomado_PUCP/main/_data/institutos1.csv")
inst
| cod_mod | ltimoden_metaatencion | cuentadeid_postulante_procesoadm | sumaden_flagingresante | sumaden_flagmatriculado | ratio | ratio1 | dif | dif1 | nlat_ie | ... | population_ccpp | altitude | Dblock1km | Dblock2km | Dblock3km | Dblock5km | Dblock10km | Dblock20km | Dblock30km | _merge | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 207597 | 307 | 481 | 302 | 301 | 0.980456 | 1.566775 | -6 | 174 | -13.52107 | ... | 106400.0 | 3386.0 | 14840369 | 742184 | 494123 | 296073 | 148036 | 7418 | 4912 | matched (3) |
| 1 | 207613 | 240 | 502 | 204 | 204 | 0.850000 | 2.091667 | -36 | 262 | -13.63997 | ... | 45864.0 | 2342.0 | 14970470 | 748235 | 499156 | 299094 | 149047 | 7423 | 4915 | matched (3) |
| 2 | 239970 | 537 | 1047 | 484 | 476 | 0.886406 | 1.949721 | -61 | 510 | -15.47827 | ... | 216716.0 | 3831.0 | 17000163 | 850081 | 566054 | 340032 | 170016 | 8508 | 5605 | matched (3) |
| 3 | 262311 | 423 | 526 | 353 | 349 | 0.825059 | 1.243499 | -74 | 103 | -6.23041 | ... | 23202.0 | 2358.0 | 6781025 | 339512 | 226341 | 135205 | 67102 | 3351 | 2234 | matched (3) |
| 4 | 273979 | 425 | 544 | 411 | 405 | 0.952941 | 1.280000 | -20 | 119 | -6.49004 | ... | 67362.0 | 309.0 | 7070857 | 353428 | 235285 | 141171 | 70085 | 3542 | 2328 | matched (3) |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 210 | 1468594 | 1566 | 250 | 241 | 202 | 0.128991 | 0.159642 | -1364 | -1316 | -12.02987 | ... | 579561.0 | 125.0 | 13190935 | 659467 | 439311 | 263187 | 131093 | 6546 | 4331 | matched (3) |
| 211 | 1468610 | 284 | 74 | 71 | 71 | 0.250000 | 0.260563 | -213 | -210 | -12.08215 | ... | 66171.0 | 115.0 | 13250934 | 662467 | 441311 | 265186 | 132093 | 6646 | 4431 | matched (3) |
| 212 | 1545623 | 210 | 266 | 184 | 177 | 0.842857 | 1.266667 | -33 | 56 | -16.13940 | ... | 1175.0 | 3780.0 | 17740208 | 887104 | 591069 | 354041 | 177020 | 8810 | 5906 | matched (3) |
| 213 | 1595495 | 206 | 144 | 141 | 137 | 0.665049 | 0.699029 | -69 | -62 | -18.01370 | ... | 86095.0 | 577.0 | 19810177 | 990088 | 660059 | 396035 | 198017 | 9908 | 6605 | matched (3) |
| 214 | 1637859 | 180 | 134 | 125 | 125 | 0.694444 | 0.744444 | -55 | -46 | -13.63631 | ... | 45864.0 | 2392.0 | 14970469 | 748234 | 499156 | 299093 | 149046 | 7423 | 4915 | matched (3) |
215 rows × 21 columns
#Nos quedamos con las columnas necesarias.
print(list(inst.columns))
['cod_mod', 'ltimoden_metaatencion', 'cuentadeid_postulante_procesoadm', 'sumaden_flagingresante', 'sumaden_flagmatriculado', 'ratio', 'ratio1', 'dif', 'dif1', 'nlat_ie', 'nlong_ie', 'population_ccpp', 'altitude', 'Dblock1km', 'Dblock2km', 'Dblock3km', 'Dblock5km', 'Dblock10km', 'Dblock20km', 'Dblock30km', '_merge']
inst.drop(['ratio', 'ratio1', 'dif', 'dif1', 'altitude', 'Dblock1km', 'Dblock2km', 'Dblock3km', 'Dblock5km','population_ccpp', 'Dblock10km', 'Dblock20km', 'Dblock30km', '_merge'],axis=1)
| cod_mod | ltimoden_metaatencion | cuentadeid_postulante_procesoadm | sumaden_flagingresante | sumaden_flagmatriculado | nlat_ie | nlong_ie | |
|---|---|---|---|---|---|---|---|
| 0 | 207597 | 307 | 481 | 302 | 301 | -13.52107 | -71.97748 |
| 1 | 207613 | 240 | 502 | 204 | 204 | -13.63997 | -72.88261 |
| 2 | 239970 | 537 | 1047 | 484 | 476 | -15.47827 | -70.12756 |
| 3 | 262311 | 423 | 526 | 353 | 349 | -6.23041 | -77.86943 |
| 4 | 273979 | 425 | 544 | 411 | 405 | -6.49004 | -76.36539 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 210 | 1468594 | 1566 | 250 | 241 | 202 | -12.02987 | -77.05831 |
| 211 | 1468610 | 284 | 74 | 71 | 71 | -12.08215 | -77.05295 |
| 212 | 1545623 | 210 | 266 | 184 | 177 | -16.13940 | -70.53580 |
| 213 | 1595495 | 206 | 144 | 141 | 137 | -18.01370 | -70.25080 |
| 214 | 1637859 | 180 | 134 | 125 | 125 | -13.63631 | -72.87916 |
215 rows × 7 columns
#importando la data del github
shp_dpt= gpd.read_file(r'../../_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp')
shp_dpt
---------------------------------------------------------------------------
CPLE_OpenFailedError Traceback (most recent call last)
fiona\ogrext.pyx in fiona.ogrext.gdal_open_vector()
fiona\_err.pyx in fiona._err.exc_wrap_pointer()
CPLE_OpenFailedError: ../../_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp: No such file or directory
During handling of the above exception, another exception occurred:
DriverError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_16236\3795121322.py in <module>
1 #importando la data del github
----> 2 shp_dpt= gpd.read_file(r'../../_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp')
3 shp_dpt
~\anaconda3\lib\site-packages\geopandas\io\file.py in _read_file(filename, bbox, mask, rows, engine, **kwargs)
257
258 if engine == "fiona":
--> 259 return _read_file_fiona(
260 path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
261 )
~\anaconda3\lib\site-packages\geopandas\io\file.py in _read_file_fiona(path_or_bytes, from_bytes, bbox, mask, rows, where, **kwargs)
301
302 with fiona_env():
--> 303 with reader(path_or_bytes, **kwargs) as features:
304 crs = features.crs_wkt
305 # attempt to get EPSG code
~\anaconda3\lib\site-packages\fiona\env.py in wrapper(*args, **kwds)
455
456 with env_ctor(session=session):
--> 457 return f(*args, **kwds)
458
459 return wrapper
~\anaconda3\lib\site-packages\fiona\__init__.py in open(fp, mode, driver, schema, crs, encoding, layer, vfs, enabled_drivers, crs_wkt, allow_unsupported_drivers, **kwargs)
363
364 if mode in ("a", "r"):
--> 365 colxn = Collection(
366 path,
367 mode,
~\anaconda3\lib\site-packages\fiona\collection.py in __init__(self, path, mode, driver, schema, crs, encoding, layer, vsi, archive, enabled_drivers, crs_wkt, ignore_fields, ignore_geometry, include_fields, wkt_version, allow_unsupported_drivers, **kwargs)
232 if self.mode == "r":
233 self.session = Session()
--> 234 self.session.start(self, **kwargs)
235 elif self.mode in ("a", "w"):
236 self.session = WritingSession()
fiona\ogrext.pyx in fiona.ogrext.Session.start()
fiona\ogrext.pyx in fiona.ogrext.gdal_open_vector()
DriverError: ../../_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp: No such file or directory
inst_geo=gpd.GeoDataFrame(inst,crs="EPSG:4326",
geometry=gpd.points_from_xy(inst.nlong_ie, inst.nlat_ie))
intersect_inst_geo=gpd.overlay(inst_geo,shp_dpt,how="intersection")
#Variables: meta de admisión, total postulantes, total ingresantes, total matriculados
vars_sum=['ltimoden_metaatencion','cuentadeid_postulante_procesoadm','sumaden_flagingresante','sumaden_flagmatriculado']
tot_dpt=intersect_inst_geo.groupby(['CCDD'], as_index=False)[vars_sum].sum()
tot_dpt_shp=shp_dpt.merge(tot_dpt,on='CCDD')
tot_dpt_shp
| OBJECTID_1 | OBJECTID | CCDD | NOMBDEP | CAPITAL | Shape_STAr | Shape_STLe | ORIG_FID | Shape_Leng | Shape_Area | CORREO | CONTACTO | geometry | ltimoden_metaatencion | cuentadeid_postulante_procesoadm | sumaden_flagingresante | sumaden_flagmatriculado | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1.0 | 01 | AMAZONAS | CHACHAPOYAS | 3.203006 | 12.912088 | 0 | 12.912088 | 3.203006 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | 1213 | 1346 | 925 | 917 |
| 1 | 2 | 2.0 | 02 | ANCASH | HUARAZ | 2.954592 | 11.780424 | 1 | 11.780424 | 2.954592 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-77.64697 -8.05086, -77.64689 -8.051... | 2969 | 4007 | 2535 | 2501 |
| 2 | 3 | 3.0 | 03 | APURIMAC | ABANCAY | 1.765933 | 7.730154 | 2 | 7.730154 | 1.765933 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-73.74655 -13.17442, -73.74570 -13.1... | 1647 | 2997 | 1506 | 1506 |
| 3 | 4 | 4.0 | 04 | AREQUIPA | AREQUIPA | 5.330203 | 17.405040 | 3 | 17.405040 | 5.330203 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-71.98109 -14.64062, -71.98093 -14.6... | 3000 | 2829 | 1976 | 1941 |
| 4 | 5 | 5.0 | 05 | AYACUCHO | AYACUCHO | 3.643705 | 17.127166 | 4 | 17.127166 | 3.643705 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-74.34843 -12.17503, -74.35000 -12.1... | 3107 | 3824 | 2406 | 2374 |
| 5 | 6 | 6.0 | 06 | CAJAMARCA | CAJAMARCA | 2.684527 | 12.397424 | 5 | 12.397424 | 2.684527 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-78.22182 -7.76346, -78.22233 -7.763... | 5943 | 7744 | 5104 | 5068 |
| 6 | 7 | 7.0 | 07 | CALLAO | CALLAO | 0.011738 | 1.111221 | 6 | 1.111221 | 0.011738 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-77.13504 -11.81820, -77.13484 -11.8... | 780 | 589 | 509 | 506 |
| 7 | 8 | 8.0 | 08 | CUSCO | CUSCO | 6.000331 | 21.794434 | 7 | 21.794434 | 6.000331 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-72.97280 -11.25189, -72.97134 -11.2... | 6441 | 6575 | 4758 | 4694 |
| 8 | 9 | 9.0 | 09 | HUANCAVELICA | HUANCAVELICA | 1.839851 | 9.561245 | 8 | 9.561245 | 1.839851 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-74.57118 -11.98710, -74.57095 -11.9... | 705 | 1694 | 705 | 646 |
| 9 | 10 | 10.0 | 10 | HUANUCO | HUANUCO | 3.089811 | 14.589840 | 9 | 14.589840 | 3.089811 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-75.99200 -8.32108, -75.99015 -8.321... | 1599 | 2360 | 1482 | 1463 |
| 10 | 11 | 11.0 | 11 | ICA | ICA | 1.767025 | 10.374368 | 10 | 10.374368 | 1.767025 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-75.61136 -12.96649, -75.61103 -12.9... | 2882 | 1451 | 1248 | 1199 |
| 11 | 12 | 12.0 | 12 | JUNIN | HUANCAYO | 3.658881 | 14.314727 | 11 | 14.314727 | 3.658881 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-75.03564 -10.68232, -75.03436 -10.6... | 4864 | 4042 | 3075 | 2945 |
| 12 | 13 | 13.0 | 13 | LA LIBERTAD | TRUJILLO | 2.068780 | 13.148266 | 12 | 13.148266 | 2.068780 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-77.75753 -6.96579, -77.75638 -6.967... | 5708 | 4387 | 3230 | 3132 |
| 13 | 14 | 14.0 | 14 | LAMBAYEQUE | CHICLAYO | 1.184994 | 6.685351 | 13 | 6.685351 | 1.184994 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-79.88373 -5.49283, -79.88265 -5.494... | 2671 | 1936 | 1769 | 1752 |
| 14 | 15 | 15.0 | 15 | LIMA | LIMA | 2.904475 | 12.745226 | 14 | 12.745226 | 2.904475 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | MULTIPOLYGON (((-76.79732 -12.47686, -76.79732... | 20885 | 9159 | 7847 | 6832 |
| 15 | 19 | 16.0 | 16 | LORETO | IQUITOS | 30.550256 | 46.701789 | 15 | 46.701789 | 30.550256 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-75.10004 -0.08096, -75.09485 -0.081... | 2106 | 3249 | 1841 | 1709 |
| 16 | 20 | 17.0 | 17 | MADRE DE DIOS | PUERTO MALDONADO | 7.023854 | 15.416397 | 16 | 15.416397 | 7.023854 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-70.61346 -10.12964, -70.61359 -10.2... | 292 | 307 | 257 | 266 |
| 17 | 21 | 18.0 | 18 | MOQUEGUA | MOQUEGUA | 1.334888 | 7.720424 | 17 | 7.720424 | 1.334888 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-70.79388 -15.98721, -70.79486 -15.9... | 508 | 566 | 452 | 431 |
| 18 | 22 | 19.0 | 19 | PASCO | CERRO DE PASCO | 1.972864 | 10.891698 | 18 | 10.891698 | 1.972864 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-74.68005 -9.44727, -74.67872 -9.448... | 802 | 964 | 692 | 680 |
| 19 | 23 | 20.0 | 20 | PIURA | PIURA | 2.899223 | 11.299471 | 19 | 11.299471 | 2.899223 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-81.02621 -4.10594, -81.02557 -4.106... | 1010 | 1094 | 759 | 755 |
| 20 | 24 | 21.0 | 21 | PUNO | PUNO | 5.666684 | 26.709721 | 20 | 26.709721 | 5.666684 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | MULTIPOLYGON (((-68.81422 -16.34799, -68.91839... | 4455 | 5114 | 2904 | 2798 |
| 21 | 31 | 22.0 | 22 | SAN MARTIN | MOYOBAMBA | 4.174664 | 16.162954 | 21 | 16.162954 | 4.174664 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-77.71678 -5.41597, -77.71270 -5.416... | 1747 | 2327 | 1660 | 1649 |
| 22 | 32 | 23.0 | 23 | TACNA | TACNA | 1.362022 | 6.088704 | 22 | 6.088704 | 1.362022 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-70.26604 -16.77955, -70.26474 -16.7... | 846 | 887 | 667 | 662 |
| 23 | 33 | 24.0 | 24 | TUMBES | TUMBES | 0.378757 | 3.787977 | 23 | 3.787977 | 0.378757 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-80.28521 -3.41276, -80.28406 -3.412... | 390 | 483 | 336 | 223 |
| 24 | 34 | 25.0 | 25 | UCAYALI | PUCALLPA | 8.660111 | 27.470159 | 24 | 27.470159 | 8.660111 | juan.suyo@geogpsperu.com | www.geogpsperu.com | 931381206 | POLYGON ((-74.47145 -7.27617, -74.47052 -7.277... | 907 | 1200 | 754 | 744 |
zoom_start = 5.5
lat_inst = inst["nlat_ie"].mean()
lon_inst= inst["nlong_ie"].mean()
a = fm.Map( location = [lat_inst,lon_inst], tiles="OpenStreetMap", zoom_start = zoom_start, control_scale=True)
fm.Choropleth(
geo_data=shp_dpt,
data=tot_dpt_shp,
columns=['CCDD', 'ltimoden_metaatencion'],
key_on="feature.properties.CCDD",
fill_color="YlOrRd",
fill_opacity=0.8,
line_opacity=0.2,
legend_name="Poverty Rate (%)",
smooth_factor=0,
Highlight= True,
line_color = "#0000",
overlay=True,
nan_fill_color = "White" # fill white missing values
).add_to(a)
a
Make this Notebook Trusted to load map: File -> Trust Notebook